from langchain_text_splitters import RecursiveCharacterTextSplitter


def split_document(text, chunk_size=100, chunk_overlap=10):
    # 创建文本分割器
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,  # 每个片段的字符数
        chunk_overlap=chunk_overlap,  # 片段间重叠的字符数
        length_function=len,  # 用于计算长度的函数
        separators=["\n\n", "\n", " ", ""]  # 分割时优先使用的分隔符
    )
    split_text = text_splitter.split_text(text)
    return split_text


if __name__ == '__main__':

    split_document()

